from time import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from copy import copy
np.set_printoptions(precision=4)
def read_data(type_:str, power_transform=True):
df = pd.read_csv("x_%s_gr_smpl.csv" % type_).astype(int)
# label data-frame rows based on sample data
for x in range(10):
index = ~np.loadtxt("y_%s_smpl_%s.csv" % (type_, x), delimiter=",", skiprows=1).astype(bool) # reversed flags (~)
df.loc[index, 'label'] = x
x = df.iloc[:, 0:2304].to_numpy()
y = df.iloc[:, 2304].to_numpy()
# Randomise instance order (forcing the same result each time)
np.random.seed(42)
permutation = np.random.permutation(df.shape[0])
x = x[permutation]
y = y[permutation]
if power_transform:
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='box-cox')
pt.fit(x)
x = pt.transform(x)
return x, y
train_x, train_y = read_data('train')
test_x, test_y = read_data('test')
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(train_x, train_y)
B_train_x = train_x
B_train_y = train_y
B_test_x = test_x
B_test_y = test_y
C_train_x = train_x[:-4000]
C_train_y = train_y[:-4000]
C_test_x = np.concatenate((test_x, train_x[-4000:]), axis=0)
C_test_y = np.concatenate((test_y, train_y[-4000:]), axis=0)
D_train_x = train_x[:-9000]
D_train_y = train_y[:-9000]
D_test_x = np.concatenate((test_x, train_x[-9000:]), axis=0)
D_test_y = np.concatenate((test_y, train_y[-9000:]), axis=0)
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
def get_auc_score(y_pred, test_y):
uniqueValues = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
occurCount = []
for x in uniqueValues:
occurCount.append(np.count_nonzero(y_pred == x))
uniqueValues = np.array(uniqueValues)
occurCount = np.array(occurCount)
y_pred = label_binarize(y_pred, classes=range(0, 10))
test_y = label_binarize(test_y, classes=range(0, 10))
tpr = {}
fpr = {}
auc_val = {}
for i in range(0, 10):
fpr[i], tpr[i], _ = roc_curve(test_y[:, i], y_pred[:, i])
auc_val[i] = auc(fpr[i], tpr[i])
auc_values = [x[1] for x in auc_val.items()]
auc_values = np.asarray(auc_values)
weighted_product = np.multiply(occurCount, auc_values)
weighted_product[np.isnan(weighted_product)] = 0.
return float(weighted_product.sum())/float(occurCount.sum())
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from imblearn.metrics import sensitivity_score, specificity_score
def calculate_scores(true_labels, predicted_labels):
return {'accuracy': accuracy_score(true_labels, predicted_labels),
'precision': precision_score(true_labels, predicted_labels, average='weighted'),
'recall': recall_score(true_labels, predicted_labels, average='weighted'), # true positive rate
'specificity': specificity_score(true_labels, predicted_labels, average='weighted'), # false positive rate
'f1_score': f1_score(true_labels, predicted_labels, average='weighted'),
'auc': get_auc_score(true_labels, predicted_labels)}
iterations = 5
linear_metrics = pd.DataFrame(columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc', 'exp'])
linear_metrics
import time
from sklearn.svm import SVC
def run_linear_classifier(train_x, train_y, test_x, test_y):
# train the linear regression classifier
classifier = SVC(kernel='linear')
classifier.fit(train_x, train_y)
# evaluate the classifier
predictions = classifier.predict(test_x)
scores = calculate_scores(test_y, predictions)
return scores
X = train_x
y = train_y
start_time = time.time()
A_linear = []
for index, data in enumerate(skf.split(X, y)):
# create train and test sets
X_train, X_test = X[data[0]], X[data[1]]
y_train, y_test = y[data[0]], y[data[1]]
scores = run_linear_classifier(X_train, y_train, X_test, y_test)
A_linear.append(scores)
linear_metrics.loc[len(linear_metrics)] = ['Linear', np.NaN, round(time.time() - start_time), np.NaN, *pd.DataFrame(A_linear).mean().tolist(), 'A']
start_time = time.time()
B_linear = []
for _ in range(iterations):
scores = run_linear_classifier(B_train_x, B_train_y, B_test_x, B_test_y)
B_linear.append(scores)
linear_metrics.loc[len(linear_metrics)] = ['Linear', np.NaN, round(time.time() - start_time), np.NaN, *pd.DataFrame(B_linear).mean().tolist(), 'B']
start_time = time.time()
C_linear = []
for _ in range(iterations):
scores = run_linear_classifier(C_train_x, C_train_y, C_test_x, C_test_y)
C_linear.append(scores)
linear_metrics.loc[len(linear_metrics)] = ['Linear', np.NaN, round(time.time() - start_time), np.NaN, *pd.DataFrame(C_linear).mean().tolist(), 'C']
start_time = time.time()
D_linear = []
for _ in range(iterations):
scores = run_linear_classifier(D_train_x, D_train_y, D_test_x, D_test_y)
D_linear.append(scores)
linear_metrics.loc[len(linear_metrics)] = ['Linear', np.NaN, round(time.time() - start_time), np.NaN, *pd.DataFrame(D_linear).mean().tolist(), 'D']
linear_metrics
from keras.models import Sequential, clone_model
from keras.layers import Dense
default_classifier = Sequential()
default_classifier.name = 'd=[10, 10, 10]'
default_classifier.add(Dense(10, input_dim=2304, activation='relu'))
default_classifier.add(Dense(10, activation='relu'))
default_classifier.add(Dense(10, activation='softmax'))
from keras.optimizers import SGD
default_optimizer = SGD(lr=0.01, nesterov=False)
default_optimizer.name = 'SGD(lr=0.01)'
default_epochs = 200
def run_classifier(classifier, optimizer, iterate, epochs, train_x, train_y, test_x, test_y, verbose: bool = True, plot_cm=True):
t_hist_a = pd.DataFrame()
v_hist_a = pd.DataFrame()
t_hist_l = pd.DataFrame()
v_hist_l = pd.DataFrame()
out = []
for i in range(iterate):
classifier_ = clone_model(classifier)
optimizer_ = copy(optimizer)
if verbose:
print("[%s] Running classifier: %s, %s" % (i+1, classifier_.name, optimizer_.name))
classifier_.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer_, metrics=['sparse_categorical_accuracy'])
history = classifier_.fit(train_x, train_y, epochs=epochs, validation_data=(test_x, test_y), shuffle=False, batch_size=256, verbose=0)
probabilities = classifier_.predict(test_x, batch_size=1, verbose=0)
predictions = np.array([np.argmax(x) for x in probabilities])
scores = calculate_scores(test_y, predictions)
out.append(scores)
if plot_cm:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_y, predictions)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
df_cm = pd.DataFrame(cm, index = [str(i) for i in range(10)], columns = [str(i) for i in range(10)])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)
plt.show()
t_hist_a[str(i)] = history.history['sparse_categorical_accuracy']
v_hist_a[str(i)] = history.history['val_sparse_categorical_accuracy']
t_hist_l[str(i)] = history.history['loss']
v_hist_l[str(i)] = history.history['val_loss']
return out, t_hist_a, v_hist_a, t_hist_l, v_hist_l
def plot_results(t_hist, v_hist, title, y_label, legend_loc):
# calculate means and standard deviations
train_means = t_hist.mean(axis=1)
train_std = t_hist.std(axis=1)
val_means = v_hist.mean(axis=1)
val_std = v_hist.std(axis=1)
# plot train and validation loss
plt.plot(train_means, color='blue', label='train', linewidth=0.8)
plt.fill_between(range(t_hist.shape[0]), train_means-train_std, train_means+train_std, alpha=0.15)
plt.plot(val_means, color='orange', label='validation', linewidth=0.8)
plt.fill_between(range(v_hist.shape[0]), val_means-val_std, val_means+val_std, alpha=0.15)
plt.title(title, fontsize=10)
plt.ylabel(y_label)
plt.xlabel('epoch')
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.ylim(0.0, 1.0)
plt.legend(['train', 'validation'], loc=legend_loc)
plt.show()
from pylab import rcParams
rcParams['figure.figsize'] = 3, 3
X = train_x
y = train_y
def run_exp_A(c, o, i, e):
import time
start_time = time.time()
t_hist_a = pd.DataFrame()
v_hist_a = pd.DataFrame()
t_hist_l = pd.DataFrame()
v_hist_l = pd.DataFrame()
scores = []
for idx, data in enumerate(skf.split(X, y)):
# create train and test sets
X_train, X_test = X[data[0]], X[data[1]]
y_train, y_test = y[data[0]], y[data[1]]
scores_, t_hist_a_, v_hist_a_, t_hist_l_, v_hist_l_ = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=X_train, train_y=y_train,
test_x=X_test, test_y=y_test,
plot_cm=False)
scores.extend(scores_)
t_hist_a[str(idx)] = t_hist_a_.loc[:,'0']
v_hist_a[str(idx)] = v_hist_a_.loc[:,'0']
t_hist_l[str(idx)] = t_hist_l_.loc[:,'0']
v_hist_l[str(idx)] = v_hist_l_.loc[:,'0']
return scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l
classifier1 = Sequential()
classifier1.name = 'd=[5, 10]'
classifier1.add(Dense(5, input_dim=2304, activation='relu'))
classifier1.add(Dense(10, activation='softmax'))
classifier2 = Sequential()
classifier2.name = 'd=[10, 10]'
classifier2.add(Dense(10, input_dim=2304, activation='relu'))
classifier2.add(Dense(10, activation='softmax'))
classifier3 = Sequential()
classifier3.name = 'd=[30, 10]'
classifier3.add(Dense(30, input_dim=2304, activation='relu'))
classifier3.add(Dense(10, activation='softmax'))
classifier4 = Sequential()
classifier4.name = 'd=[5, 5, 10]'
classifier4.add(Dense(5, input_dim=2304, activation='relu'))
classifier4.add(Dense(5, activation='relu'))
classifier4.add(Dense(10, activation='softmax'))
classifier5 = Sequential()
classifier5.name = 'd=[10, 10, 10]'
classifier5.add(Dense(10, input_dim=2304, activation='relu'))
classifier5.add(Dense(10, activation='relu'))
classifier5.add(Dense(10, activation='softmax'))
classifier6 = Sequential()
classifier6.name = 'd=[30, 30, 10]'
classifier6.add(Dense(30, input_dim=2304, activation='relu'))
classifier6.add(Dense(30, activation='relu'))
classifier6.add(Dense(10, activation='softmax'))
all_classifiers = [classifier1, classifier2, classifier3, classifier4, classifier5, classifier6]
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
a_metrics_1 = pd.DataFrame(columns=columns)
a_metrics_1
i = 1 # number of iterations
e = default_epochs # number of epochs per iteration
o = default_optimizer # default optimizer
for c in all_classifiers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_exp_A(c, o, i, e)
plot_results(t_hist_a, v_hist_a, "A: %s" % c.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "A: %s" % c.name, 'loss', 'upper right')
a_metrics_1.loc[len(a_metrics_1)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
a_metrics_1
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
a_metrics_2 = pd.DataFrame(columns=columns)
a_metrics_2
o1 = SGD(lr=0.0001, nesterov=False)
o1.name = 'SGD(lr=0.0001)'
o2 = SGD(lr=0.001, nesterov=False)
o2.name = 'SGD(lr=0.001)'
o3 = SGD(lr=0.01, nesterov=False)
o3.name = 'SGD(lr=0.01)'
o4 = SGD(lr=0.1, nesterov=False)
o4.name = 'SGD(lr=0.1)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_exp_A(c, o, i, e)
plot_results(t_hist_a, v_hist_a, "A: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "A: %s" % o.name, 'loss', 'upper right')
a_metrics_2.loc[len(a_metrics_2)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
a_metrics_2
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
a_metrics_3 = pd.DataFrame(columns=columns)
a_metrics_3
o1 = SGD(lr=0.01, momentum=0.0, nesterov=False)
o1.name = 'SGD(lr=0.01, m=0.0)'
o2 = SGD(lr=0.01, momentum=0.5, nesterov=False)
o2.name = 'SGD(lr=0.01, m=0.5)'
o3 = SGD(lr=0.01, momentum=0.9, nesterov=False)
o3.name = 'SGD(lr=0.01, m=0.9)'
o4 = SGD(lr=0.01, momentum=0.99, nesterov=False)
o4.name = 'SGD(lr=0.01, m=0.99)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_exp_A(c, o, i, e)
plot_results(t_hist_a, v_hist_a, "A: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "A: %s" % o.name, 'loss', 'upper right')
a_metrics_3.loc[len(a_metrics_3)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
a_metrics_3
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
a_metrics_4 = pd.DataFrame(columns=columns)
a_metrics_4
i = 1 # number of iterations
e = 1000 # number of epochs per iteration
c = default_classifier
o = default_optimizer
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_exp_A(c, o, i, e)
plot_results(t_hist_a, v_hist_a, "A: %s epochs" % e, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "A: %s epochs" % e, 'loss', 'upper right')
a_metrics_4.loc[len(a_metrics_4)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
a_metrics_4
from time import time
_train_x, _train_y, _test_x, _test_y = B_train_x, B_train_y, B_test_x, B_test_y
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
b_metrics_1 = pd.DataFrame(columns=columns)
b_metrics_1
classifier1 = Sequential()
classifier1.name = 'd=[5, 10]'
classifier1.add(Dense(5, input_dim=2304, activation='relu'))
classifier1.add(Dense(10, activation='softmax'))
classifier2 = Sequential()
classifier2.name = 'd=[10, 10]'
classifier2.add(Dense(10, input_dim=2304, activation='relu'))
classifier2.add(Dense(10, activation='softmax'))
classifier3 = Sequential()
classifier3.name = 'd=[30, 10]'
classifier3.add(Dense(30, input_dim=2304, activation='relu'))
classifier3.add(Dense(10, activation='softmax'))
classifier4 = Sequential()
classifier4.name = 'd=[5, 5, 10]'
classifier4.add(Dense(5, input_dim=2304, activation='relu'))
classifier4.add(Dense(5, activation='relu'))
classifier4.add(Dense(10, activation='softmax'))
classifier5 = Sequential()
classifier5.name = 'd=[10, 10, 10]'
classifier5.add(Dense(10, input_dim=2304, activation='relu'))
classifier5.add(Dense(10, activation='relu'))
classifier5.add(Dense(10, activation='softmax'))
classifier6 = Sequential()
classifier6.name = 'd=[30, 30, 10]'
classifier6.add(Dense(30, input_dim=2304, activation='relu'))
classifier6.add(Dense(30, activation='relu'))
classifier6.add(Dense(10, activation='softmax'))
all_classifiers = [classifier1, classifier2, classifier3, classifier4, classifier5, classifier6]
i = 1 # number of iterations
e = default_epochs
o = default_optimizer # default optimizer
for c in all_classifiers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "B: %s" % c.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "B: %s" % c.name, 'loss', 'upper right')
b_metrics_1.loc[len(b_metrics_1)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
b_metrics_1
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
b_metrics_2 = pd.DataFrame(columns=columns)
b_metrics_2
o1 = SGD(lr=0.0001, nesterov=False)
o1.name = 'SGD(lr=0.0001)'
o2 = SGD(lr=0.001, nesterov=False)
o2.name = 'SGD(lr=0.001)'
o3 = SGD(lr=0.01, nesterov=False)
o3.name = 'SGD(lr=0.01)'
o4 = SGD(lr=0.1, nesterov=False)
o4.name = 'SGD(lr=0.1)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "B: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "B: %s" % o.name, 'loss', 'upper right')
b_metrics_2.loc[len(b_metrics_2)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
b_metrics_2
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
b_metrics_3 = pd.DataFrame(columns=columns)
b_metrics_3
o1 = SGD(lr=0.01, momentum=0.0, nesterov=False)
o1.name = 'SGD(lr=0.01, m=0.0)'
o2 = SGD(lr=0.01, momentum=0.5, nesterov=False)
o2.name = 'SGD(lr=0.01, m=0.5)'
o3 = SGD(lr=0.01, momentum=0.9, nesterov=False)
o3.name = 'SGD(lr=0.01, m=0.9)'
o4 = SGD(lr=0.01, momentum=0.99, nesterov=False)
o4.name = 'SGD(lr=0.01, m=0.99)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "B: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "B: %s" % o.name, 'loss', 'upper right')
b_metrics_3.loc[len(b_metrics_3)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
b_metrics_3
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
b_metrics_4 = pd.DataFrame(columns=columns)
b_metrics_4
i = 1 # number of iterations
e = 1000 # number of epochs per iteration
c = default_classifier
o = default_optimizer
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "B: %s epochs" % e, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "B: %s epochs" % e, 'loss', 'upper right')
b_metrics_4.loc[len(b_metrics_4)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
b_metrics_4
_train_x, _train_y, _test_x, _test_y = C_train_x, C_train_y, C_test_x, C_test_y
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
c_metrics_1 = pd.DataFrame(columns=columns)
c_metrics_1
classifier1 = Sequential()
classifier1.name = 'd=[5, 10]'
classifier1.add(Dense(5, input_dim=2304, activation='relu'))
classifier1.add(Dense(10, activation='softmax'))
classifier2 = Sequential()
classifier2.name = 'd=[10, 10]'
classifier2.add(Dense(10, input_dim=2304, activation='relu'))
classifier2.add(Dense(10, activation='softmax'))
classifier3 = Sequential()
classifier3.name = 'd=[30, 10]'
classifier3.add(Dense(30, input_dim=2304, activation='relu'))
classifier3.add(Dense(10, activation='softmax'))
classifier4 = Sequential()
classifier4.name = 'd=[5, 5, 10]'
classifier4.add(Dense(5, input_dim=2304, activation='relu'))
classifier4.add(Dense(5, activation='relu'))
classifier4.add(Dense(10, activation='softmax'))
classifier5 = Sequential()
classifier5.name = 'd=[10, 10, 10]'
classifier5.add(Dense(10, input_dim=2304, activation='relu'))
classifier5.add(Dense(10, activation='relu'))
classifier5.add(Dense(10, activation='softmax'))
classifier6 = Sequential()
classifier6.name = 'd=[30, 30, 10]'
classifier6.add(Dense(30, input_dim=2304, activation='relu'))
classifier6.add(Dense(30, activation='relu'))
classifier6.add(Dense(10, activation='softmax'))
all_classifiers = [classifier1, classifier2, classifier3, classifier4, classifier5, classifier6]
i = 1 # number of iterations
e = default_epochs
o = default_optimizer # default optimizer
for c in all_classifiers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "C: %s" % c.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "C: %s" % c.name, 'loss', 'upper right')
c_metrics_1.loc[len(c_metrics_1)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
c_metrics_1
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
c_metrics_2 = pd.DataFrame(columns=columns)
c_metrics_2
o1 = SGD(lr=0.0001, nesterov=False)
o1.name = 'SGD(lr=0.0001)'
o2 = SGD(lr=0.001, nesterov=False)
o2.name = 'SGD(lr=0.001)'
o3 = SGD(lr=0.01, nesterov=False)
o3.name = 'SGD(lr=0.01)'
o4 = SGD(lr=0.1, nesterov=False)
o4.name = 'SGD(lr=0.1)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "C: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "C: %s" % o.name, 'loss', 'upper right')
c_metrics_2.loc[len(c_metrics_2)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
c_metrics_2
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
c_metrics_3 = pd.DataFrame(columns=columns)
c_metrics_3
o1 = SGD(lr=0.01, momentum=0.0, nesterov=False)
o1.name = 'SGD(lr=0.01, m=0.0)'
o2 = SGD(lr=0.01, momentum=0.5, nesterov=False)
o2.name = 'SGD(lr=0.01, m=0.5)'
o3 = SGD(lr=0.01, momentum=0.9, nesterov=False)
o3.name = 'SGD(lr=0.01, m=0.9)'
o4 = SGD(lr=0.01, momentum=0.99, nesterov=False)
o4.name = 'SGD(lr=0.01, m=0.99)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "C: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "C: %s" % o.name, 'loss', 'upper right')
c_metrics_3.loc[len(c_metrics_3)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
c_metrics_3
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
c_metrics_4 = pd.DataFrame(columns=columns)
c_metrics_4
i = 1 # number of iterations
e = 1000
c = default_classifier
o = default_optimizer
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "C: %s epochs" % e, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "C: %s epochs" % e, 'loss', 'upper right')
c_metrics_4.loc[len(c_metrics_4)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
c_metrics_4
_train_x, _train_y, _test_x, _test_y = D_train_x, D_train_y, D_test_x, D_test_y
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
d_metrics_1 = pd.DataFrame(columns=columns)
d_metrics_1
classifier1 = Sequential()
classifier1.name = 'd=[5, 10]'
classifier1.add(Dense(5, input_dim=2304, activation='relu'))
classifier1.add(Dense(10, activation='softmax'))
classifier2 = Sequential()
classifier2.name = 'd=[10, 10]'
classifier2.add(Dense(10, input_dim=2304, activation='relu'))
classifier2.add(Dense(10, activation='softmax'))
classifier3 = Sequential()
classifier3.name = 'd=[30, 10]'
classifier3.add(Dense(30, input_dim=2304, activation='relu'))
classifier3.add(Dense(10, activation='softmax'))
classifier4 = Sequential()
classifier4.name = 'd=[5, 5, 10]'
classifier4.add(Dense(5, input_dim=2304, activation='relu'))
classifier4.add(Dense(5, activation='relu'))
classifier4.add(Dense(10, activation='softmax'))
classifier5 = Sequential()
classifier5.name = 'd=[10, 10, 10]'
classifier5.add(Dense(10, input_dim=2304, activation='relu'))
classifier5.add(Dense(10, activation='relu'))
classifier5.add(Dense(10, activation='softmax'))
classifier6 = Sequential()
classifier6.name = 'd=[30, 30, 10]'
classifier6.add(Dense(30, input_dim=2304, activation='relu'))
classifier6.add(Dense(30, activation='relu'))
classifier6.add(Dense(10, activation='softmax'))
all_classifiers = [classifier1, classifier2, classifier3, classifier4, classifier5, classifier6]
i = 1 # number of iterations
e = default_epochs
o = default_optimizer # default optimizer
for c in all_classifiers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "D: %s" % c.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "D: %s" % c.name, 'loss', 'upper right')
d_metrics_1.loc[len(d_metrics_1)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
d_metrics_1
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
d_metrics_2 = pd.DataFrame(columns=columns)
d_metrics_2
o1 = SGD(lr=0.0001, nesterov=False)
o1.name = 'SGD(lr=0.0001)'
o2 = SGD(lr=0.001, nesterov=False)
o2.name = 'SGD(lr=0.001)'
o3 = SGD(lr=0.01, nesterov=False)
o3.name = 'SGD(lr=0.01)'
o4 = SGD(lr=0.1, nesterov=False)
o4.name = 'SGD(lr=0.1)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "D: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "D: %s" % o.name, 'loss', 'upper right')
d_metrics_2.loc[len(d_metrics_2)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
d_metrics_2
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
d_metrics_3 = pd.DataFrame(columns=columns)
d_metrics_3
o1 = SGD(lr=0.01, momentum=0.0, nesterov=False)
o1.name = 'SGD(lr=0.01, m=0.0)'
o2 = SGD(lr=0.01, momentum=0.5, nesterov=False)
o2.name = 'SGD(lr=0.01, m=0.5)'
o3 = SGD(lr=0.01, momentum=0.9, nesterov=False)
o3.name = 'SGD(lr=0.01, m=0.9)'
o4 = SGD(lr=0.01, momentum=0.99, nesterov=False)
o4.name = 'SGD(lr=0.01, m=0.99)'
all_optimizers = (o1, o2, o3, o4)
i = 1 # number of iterations
e = default_epochs
c = default_classifier # default classifier
for o in all_optimizers:
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "D: %s" % o.name, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "D: %s" % o.name, 'loss', 'upper right')
d_metrics_3.loc[len(d_metrics_3)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
d_metrics_3
columns=['classifier', 'optimizer', 'seconds', 'epochs', 'accuracy', 'precision', 'recall', 'specificity', 'f1_score', 'auc']
d_metrics_4 = pd.DataFrame(columns=columns)
d_metrics_4
i = 1 # number of iterations
e = 1000 # number of epochs per iteration
c = default_classifier
o = default_optimizer
start_time = time()
scores, t_hist_a, v_hist_a, t_hist_l, v_hist_l = run_classifier(classifier=c, optimizer=o, iterate=i, epochs=e,
train_x=_train_x, train_y=_train_y,
test_x=_test_x, test_y=_test_y)
plot_results(t_hist_a, v_hist_a, "D: %s epochs" % e, 'accuracy', 'lower right')
plot_results(t_hist_l, v_hist_l, "D: %s epochs" % e, 'loss', 'upper right')
d_metrics_4.loc[len(d_metrics_4)] = [c.name, o.name, round(time() - start_time), e, *pd.DataFrame(scores).mean().tolist()]
d_metrics_4
a_metrics = a_metrics_1.append(a_metrics_2).append(a_metrics_3).append(a_metrics_4).reset_index()
b_metrics = b_metrics_1.append(b_metrics_2).append(b_metrics_3).append(b_metrics_4).reset_index()
c_metrics = c_metrics_1.append(c_metrics_2).append(c_metrics_3).append(c_metrics_4).reset_index()
d_metrics = d_metrics_1.append(d_metrics_2).append(d_metrics_3).append(d_metrics_4).reset_index()
d_metrics
linear_metrics
a_metrics['exp'] = 'A'
b_metrics['exp'] = 'B'
c_metrics['exp'] = 'C'
d_metrics['exp'] = 'D'
all_metrics = a_metrics.append(b_metrics).append(c_metrics).append(d_metrics).append(linear_metrics)
all_metrics.loc[all_metrics.classifier == 'Linear', 'optimizer'] = ""
all_metrics['model'] = all_metrics['classifier']+', '+all_metrics['optimizer']
all_metrics = all_metrics.sort_values(['exp', 'classifier'])
all_metrics
import seaborn as sns
plt.figure(figsize = (12,7))
sns.set(style="white")
ax = sns.barplot(x="model", y="precision", hue="exp", data=all_metrics, palette=['blue', 'orange', 'green', 'red'])
ax.axhline(all_metrics.loc[(all_metrics.classifier == 'Linear') & (all_metrics.exp == 'A'), 'accuracy'].values, c='blue', linestyle='--', linewidth=0.6)
ax.axhline(all_metrics.loc[(all_metrics.classifier == 'Linear') & (all_metrics.exp == 'B'), 'accuracy'].values, c='orange', linestyle='--', linewidth=0.6)
ax.axhline(all_metrics.loc[(all_metrics.classifier == 'Linear') & (all_metrics.exp == 'C'), 'accuracy'].values, c='green', linestyle='--', linewidth=0.6)
ax.axhline(all_metrics.loc[(all_metrics.classifier == 'Linear') & (all_metrics.exp == 'D'), 'accuracy'].values, c='red', linestyle='--', linewidth=0.6)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()
# create train and test sets
train_x, test_x = B_train_x, B_test_x
train_y, test_y = B_train_y, B_test_y
# build and fit model
default_classifier.compile(loss='sparse_categorical_crossentropy', optimizer=default_optimizer, metrics=['sparse_categorical_accuracy'])
default_classifier.fit(train_x, train_y, epochs=200, validation_data=(test_x, test_y), shuffle=False, batch_size=256, verbose=1)
scores = default_classifier.predict(test_x, batch_size=1, verbose=0)
predictions = np.array([np.argmax(x) for x in scores])
# find misclassified images
misclassified = []
for i in range(len(predictions)):
if test_y[i] != predictions[i]:
misclassified.append(i)
print("model misclassified %s out of %s images" % (len(misclassified), len(predictions)))
import matplotlib.pyplot as plt
def plot_image(i, predictions_array, true_label, img):
predictions_array, true_label, img = predictions_array, true_label[i], img[i]
plt.grid(False)
plt.xticks([])
plt.yticks([])
plt.imshow(img.reshape(48,48), cmap=plt.cm.binary)
predicted_label = np.argmax(predictions_array)
if predicted_label == true_label:
color = 'blue'
else:
color = 'red'
def plot_value_array(i, predictions_array, true_label):
predictions_array, true_label = predictions_array, true_label[i]
plt.grid(False)
plt.xticks(range(10))
plt.yticks([])
thisplot = plt.bar(range(10), predictions_array, color="#777777")
plt.ylim([0, 1])
predicted_label = np.argmax(predictions_array)
thisplot[predicted_label].set_color('red')
thisplot[true_label.astype(np.int)].set_color('blue')
import random
n = 20
rc = np.random.choice(misclassified, n)
for i in rc:
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, scores[i], test_y, test_x)
plt.subplot(1,2,2)
plot_value_array(i, scores[i], test_y)
plt.show()